last updated: August 30 2021

These are the figures that we are targeting for submission to the Journal of Dental Resesarch.

Data were collected from 152 participants who were screened into 2 cohorts: the low flow cohort (N = 33) consisted of individuals with a presumptive diagnosis of the autoimmune disorder Sjögren’s Syndrome (SS) while the control cohort (N = 119) consisted of otherwise healthy controls. The complete set of inclusion and exclusion criteria for each cohort is provided as supplementary data (Supplementary Table 1).

Gender identity, race, and ethnicity did not significantly differ between the control and low flow cohorts (Chisquare-tests, p > 0.1) (Supplementary Table 2). The majority of subjects in both cohorts were White or Asian and did not identify as Hispanic or Latino. A substantial sex bias was seen in both cohorts: roughly 62% of the control cohort identified as female while 93% of the low flow cohort identified as female, consistent with the known sex bias in the occurrence of Sjogren Syndrome

Things to do

Flow rate for 39 came from Day 1 not the ESE; the missing birthdays were recovered from REDCap track changes since Ava and Danielle made undocumented deletions of the provided birthdays.

#load required packages
library("easypackages")
#define packages
libraries("tidyverse",
          "phyloseq",
           "genefilter",
            "plyr",
            "knitr",
            "modelr",
            "gridExtra",
            "reshape2",
            "purrr",
            "stringr",
            "ggrepel",
            "kableExtra",
            "tidyverse",
            "cowplot",
            "broom",
            "ggplot2",
            "RColorBrewer",
            "viridis",
            "ggpubr",
            "dplyr",
            "ggpmisc",
            "ggfortify", 
            "FactoMineR",
            "factoextra",
            "colorspace",
            "cowplot",
            "png",
            "sjPlot",
             "bestNormalize",
            "lmtest",
            "effects",
            "dabestr",
            "vegan",
            "DESeq2",
            "caret", 
            'party', 
            'randomForest', 
            'rfPermute', '
            rpart.plot', 
          'wesanderson')

set.seed(673)
ggplot2::theme_set(ggplot2::theme_classic())

https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/

#read in data about pids and cohort
pids <- read.csv(file="~/Dropbox/oral-clinical-data-analysis-data/cohort_20191019.csv") #152 row

#read in age
age.df = read.csv("~/Dropbox/oral-clinical-data-analysis-data/age_df.csv") #152 rows
age.df$X = NULL
age.df= plyr::join(age.df, pids)

#read in flow rates
uwsfr = read.csv("~/Dropbox/oral-clinical-data-analysis-data/uwsfr_df.csv") #152 rows
uwsfr$X = NULL
uwsfr= plyr::join(uwsfr, pids)

#read the coordinates
map <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/mytoothdot_coordinates_FullMouth.csv")
map <- na.omit(map)
colnames(map)[4] <- "tooth"
map$X.SampleID = NULL


#read in the caries data
caries.data <- read.csv(file="~/Dropbox/oral-clinical-data-analysis-data/caries_data_v3.0.csv")
caries.df = plyr::join(caries.data, uwsfr)
caries.df = plyr::join(caries.df, age.df)
caries_map = plyr::join(caries.df, map, by="tooth")


#read in the perio data and create a uwsfr subset
perio.data <- read.csv(file="~/Dropbox/oral-clinical-data-analysis-data/periodontal_data_v2.0.csv")
perio.df = plyr::join(perio.data, uwsfr)
perio.df = plyr::join(perio.df, age.df)
perio_map = plyr::join(perio.df, map, by="tooth")


#read in an image of the mouth
#create color so that 0 is midpoint and set as white
#load and setup background mouth diagram
img <- readPNG("~/Dropbox/mouth_smaller.png")
g<- grid::rasterGrob(img, interpolate=TRUE, height = unit(.95,"npc"), width = unit(.9, "npc"))


#transformations
caries_map$DMFS_orderNorm  =orderNorm(caries_map$DMFS.bytooth)$x.t
perio_map$cal_orderNorm   = orderNorm(perio_map$subject.tooth.mean.cal)$x.t #normal
perio_map$pd_orderNorm   = orderNorm(perio_map$subject.tooth.mean.pd)$x.t #normal
perio_map$gmcej_orderNorm =orderNorm(perio_map$subject.tooth.mean.gm.cej)$x.t #normal
perio_map$bop_orderNorm  =orderNorm(perio_map$bop)$x.t

Figure 1: Description of the population flow rates and age

Figure 1: Gardner-Altman estimation plots reveal significant demographic and clinical differences between low flow and control cohorts. Data for control and low flow cohort patient Swarm plots of a) uws-fr, b) age, c) dmfs, d) cal and e) gm.cej, f) bop, and g) PD are plotted are plotted on the left-aligned axis. The right-aligned axis reveals the point estimate of the unpaired mean difference between groups, surrounded by their 95% confidence intervals (95% CI).

These data indicate that while probing depths are trend towards higher average PDs in the low flow cohort compared to the controls. BOP did not appear to differ between the two groups.

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Figure1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#define a function to summarize the clinical data
summarize_clinical_feature <- function(df, vars) {
  mydf <- df %>%
  ungroup(.) %>%
  dplyr::select(vars) %>%
  unique(.) %>%
  na.omit(.)  
  return(mydf)
}

#### Plot uws-fr
uwsfr$complete = complete.cases(uwsfr$uwsfr)
uwsfr$`UWS-FR (mL/min)` = uwsfr$uwsfr
uwsfr = subset(uwsfr, complete==TRUE)
uws.fr.plot <- 
  uwsfr %>%
  dabest(cohort, `UWS-FR (mL/min)`, 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()
Fig1A = plot(uws.fr.plot, axes.title.fontsize = 12)

#get the mean uws-fr
#detach(package:plyr)
#out = uwsfr %>%
#  group_by(cohort) %>%
#  summarize(mean_uws = mean(uwsfr, na.rm = TRUE))
#out



#plot age
age.df$`Age (Years)` = age.df$age
age.df = subset(age.df, redcap !=268) # drop 268 since we don't have clinical data for this subject
age.plot <- 
  age.df %>%
  dabest(cohort, `Age (Years)` , 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()

Fig1B = plot(age.plot, axes.title.fontsize = 12)

#get the mean ages by cohort
#out = age.df %>%
#  group_by(cohort) %>%
#  summarize(mean_age = mean(age, na.rm = TRUE))
#out

age.sub = unique(age.df$redcap)

#get the proportion of controls who were over the age of 40 (N=23; 19.33%)
over40c = subset(age.df, age > 40 & cohort=="Control")
c = subset(age.df, cohort=="Control")
nrow(over40c)/nrow(c)
## [1] 0.1932773
#plot dmfs
caries_map <- filter(caries_map, tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)
perio_map <- filter(perio_map, tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)


#This subfigure compares overall DMFS score per subject between cohorts
dmfs.df <- summarize_clinical_feature(df=caries_map, vars=c("redcap", "aim", "DMFS_orderNorm", "cohort"))
dmfs.df2 <- summarize_clinical_feature(df=caries_map, vars=c("redcap", "aim", "DMFS.bysubject", "cohort"))
dmfs.df2$DMFS = dmfs.df2$DMFS.bysubject
dmfs.plot <- 
  dmfs.df2 %>%
  dabest(cohort, DMFS, 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()

dmfs.sub = unique(dmfs.df2$redcap)
Fig1C = plot(dmfs.plot, axes.title.fontsize = 12)

#get the average dmfs by cohort
#out = dmfs.df2 %>%
#  group_by(cohort) %>%
#  summarize(mean_dmfs = mean(DMFS.bysubject, na.rm = TRUE))
#out


#it looks like there's a subset with low DMFS, the subset has fewer than 15; when we subset on these, they have a lower average age than the individuals in the upper group
dfms.low = subset(dmfs.df2, DMFS.bysubject  < 15)
dfms.low.subjects = dfms.low$redcap
ages_with_low_dmfs = subset(age.df, redcap %in% dfms.low.subjects)
ages_with_higher_dmfs = subset(age.df, !(redcap %in% dfms.low.subjects))


#plot cal
cal.df = summarize_clinical_feature(perio_map, vars=c("redcap", "aim", "mean.cal", "cohort"))
cal.df$`Mean CAL` =cal.df$mean.cal
cal.plot <- 
  cal.df %>%
  dabest(cohort, `Mean CAL`, 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()

Fig1D = plot(cal.plot, axes.title.fontsize = 12)

#CAL by group
#out = cal.df %>%
#  group_by(cohort) %>%
#  summarize(mean_cal = mean(mean.cal, na.rm = TRUE))
#out
#plot PD
pd.df <- summarize_clinical_feature(df=perio_map, vars=c("redcap", "aim", "subject.mean.pd", "cohort"))
pd.df$`Mean PD` = pd.df$subject.mean.pd
pd.plot <- 
  pd.df %>%
  dabest(cohort, `Mean PD`, 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()

Fig1E = plot(pd.plot, axes.title.fontsize = 12)



#plot gmcej between cohorts.
gmcej.df <- summarize_clinical_feature(df=perio_map, vars=c("redcap", "aim", "subject.mean.gm.cej", "cohort"))
gmcej.df$`Mean GM-CEJ` = gmcej.df$subject.mean.gm.cej
gmcej.plot <- 
  gmcej.df %>%
  dabest(cohort, `Mean GM-CEJ`, 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()

Fig1F = plot(gmcej.plot, axes.title.fontsize = 12)

#CAL by group
#out = gmcej.df %>%
#  group_by(cohort) %>%
#  summarize(mean_gm = mean(subject.mean.gm.cej, na.rm = TRUE))
#out


#plot bop
bop.df <- summarize_clinical_feature(df=perio_map, vars=c("redcap", "aim", "percent.bop", "cohort"))
bop.df$`Percent BOP` = bop.df$percent.bop
bop.plot <- 
  bop.df %>%
  dabest(cohort, `Percent BOP`, 
         idx = c("Control", "Low Flow"), 
         paired = FALSE)%>% 
                      mean_diff()

Fig1G = plot(bop.plot, axes.title.fontsize = 12) 

#bop by group
#out = bop.df %>%
#  group_by(cohort) %>%
#  summarize(mean_bop = mean(percent.bop, na.rm = TRUE))
#out
##### Make Figure 1
Figure1 <- cowplot::plot_grid(          
          Fig1A + theme(legend.position = "none", axis.text=element_text(size=12)),
          Fig1B + theme(legend.position = "none"),
          Fig1C + theme(legend.position = "none"),
          Fig1D + theme(legend.position = "none"),
          Fig1F + theme(legend.position = "none"),
          Fig1G + theme(legend.position = "none"),
          Fig1E + theme(legend.position = "none"),
          labels = "auto", ncol = 2)
Figure1

ggsave(Figure1, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure1.png", width=10, height=12, units="in", dpi=300)

How many “control” subjects have flow rates less than 0.3?

#how many controls have flow rates less than 0.3
control_N0.3 = subset(uwsfr, aim %in% c(1, 2)) %>%
  subset(., uwsfr <= 0.3) #35

#how many have flow rates less than 0.2
control_N0.2 = subset(uwsfr, aim %in% c(1, 2)) %>%
  subset(., uwsfr <= 0.2) #18

control_N0.1 = subset(uwsfr, aim %in% c(1, 2)) %>%
  subset(., uwsfr <= 0.1) #3

Supplementary Table 1: inclusion and exclusion criteria

Note that these are drawn from the final version of the consent forms.

![Supplementary Table 1: Inclusion and Exclusion Criteria.](/Users/dmap/Dropbox/oral-clinical-data-analysis-ms/Inclusion_exclusion.png)

Supplementary Table 2: Gender identity, race, and ethnicity does not differ between cohorts

Chi-square tests indicate that the distribution of patients in each cohort didn’t differ with respect to gender, race, or ethnicity.

#![Supplementary Table 2: Demographic #Information](/Users/dmap/Dropbox/oral-clinical-data-analysis-ms/demographic.png){width=65%}
#read in the race data and perform chi-square tets
race.df = read.csv("~/Dropbox/oral-clinical-data-analysis-data/race_df.csv")[1:7,1:3]
chisq.test(race.df$Controls, race.df$Low.Flow)
## 
##  Pearson's Chi-squared test
## 
## data:  race.df$Controls and race.df$Low.Flow
## X-squared = 28, df = 24, p-value = 0.26
#read in ethnicity data and do chi-sq tests 
ethnic.df <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/ethnicity_df.csv")[1:3,1:3]
chisq.test(ethnic.df$Control, ethnic.df$Low.Flow)
## 
##  Pearson's Chi-squared test
## 
## data:  ethnic.df$Control and ethnic.df$Low.Flow
## X-squared = 6, df = 4, p-value = 0.1991
#read in gender and do chi-square tetst
gender.df <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/gender_df.csv")[1:5,1:3]
chisq.test(gender.df$Control, gender.df$Low.Flow)
## 
##  Pearson's Chi-squared test
## 
## data:  gender.df$Control and gender.df$Low.Flow
## X-squared = 7.5, df = 6, p-value = 0.2771

Figure 2: Spatial model

A linear model was applied to data subsetted by universal tooth number excluding wisdom teeth (Teeth# 1,16,17,32). The response variables used were DMFS Score, CAL, PD, GM-CEJ, and BOP. The predictor variables used were Age, Cohort, UWS-FR, and Cohort:UWS-FR. The coefficients and confidence intervals for each tooth was plotted for each linear model.

  1. The DMFS Score response variable had higher regression coefficients in the posterior teeth (Teeth #’s 2-5, 12-15, 18-21, 28-31) when compared to the anterior teeth (Teeth #’s 6-11, 22-27) with respect to the Age predictor variable. The coefficients resulting from the effect of the Cohort predictor variable were highest in teeth 2-5 and 12. All of the coefficients of the maxillary teeth (Teeth# 2-15) and their respective confidence intervals were found to be greater than 0. Only two teeth (Teeth #’s 18, 29) had coefficients and confidence intervals, resulting from the UWS-FR effect, that did not include 0. The tooth 18 coefficient was less than 0, while the tooth 29 coefficient was greater than 0. The interaction of Cohort and UWS-FR resulted in having positive coefficients, whos confidence intervals did not overlap 0, only for teeth 19 and 30. Ten teeth in the maxilla (Teeth #’s 2, 4, 5, 7-12) and 8 in the mandible (Teeth #’s 15, 20-22, and 25-29) all had negative regression coefficients whose confidence intervals did not overlap 0.
Let’s see up the shared elements of the linear models
#set up the vector for teeth
tooth.model = c(rep(2:15, each=5), rep(18:31, each=5))

#Add tooth class to dataset.
define_tooth_class <- function(df){
      df$tooth.class <- ifelse(df$tooth %in% c(1:3, 14:19,30:32), "Molar", 
      ifelse(df$tooth %in% c(4:5, 12:13, 20:21, 28:29), "Pre.Molar", 
          ifelse(df$tooth %in% c(6, 11, 22, 27), "Canine", 
              ifelse(df$tooth %in% c(7, 10, 23, 26), "Lateral.Incisor", 
                  ifelse(df$tooth %in% c(8, 9, 24, 25), "Central.Incisor", "error")))))
    return(df)
}

#This function gets the per-tooth regression coefficients and 95% confidence intervals
# we also get the pvalues
get_spltmap_out <- function(df, myvector, model){
  conf <- df %>%
    split(., myvector) %>%
    map(~ lm(model, data = .)) %>%
    map_df(~  tidy(., conf.int = TRUE, conf.level = 0.95))
  df = data.frame(conf) 
  df$tooth <- tooth.model
  df$term <- gsub("cohortSjögren's", "Cohort", df$term)
  df$term <- gsub("age", "Age", df$term)
  df$term <- gsub("cohortLow Flow", "Cohort", df$term)
  df$term <- gsub("cohort:uwsfr", "Cohort:UWS-FR", df$term)
  df$term <- gsub("uwsfr", "UWS-FR", df$term)
  df = subset(df, term !="(Intercept)")  
  return(df)
}

### set up tthe plot for age while specifying the model
# ylim(-0.35, 0.7) 
plot_age <- function(df, model){
  p <- ggplot(filter(df, term == "Age"), aes(x = tooth, y = estimate, color = tooth.class, shape=Significance)) +
    geom_point(size=3) +
    geom_hline(yintercept = 0, color = "gray") +
    geom_vline(xintercept = 16.5, color = "gray", linetype="dotted") +
    geom_errorbar(aes(ymin = conf.low , ymax = conf.high)) +
    coord_flip() +
    ggtitle(paste0(model, " ", "~ Age")) +
    labs(x = "Universal Tooth Number", y = "Young <--> Old", color = "Tooth Class") 
}

#set up plot for term cohort and model
 #+ylim(-1.6, 1.7)
plot_cohort <- function(df, model){
  p <- ggplot(filter(df, term == "cohort"), aes(x = tooth, y = estimate, color = tooth.class, shape=Significance)) +
    geom_point(size=3) +
    geom_hline(yintercept = 0, color = "gray") +
    geom_vline(xintercept = 16.5, color = "gray", linetype="dotted") +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) +
    coord_flip() +
    ggtitle(paste0(model, " ", "~ Cohort")) +
    labs(x = "Universal Tooth Number", y = "Control <--> Sjogrens", color = "Tooth Class")
}

#plot the term uwsfr and the model
#+ylim(-1.1, 1)
plot_flow <- function(df, model) {
  p  <- ggplot(filter(df, term == "UWS-FR"), aes(x = tooth, y = estimate, color = tooth.class, shape=Significance)) +
    geom_point(size=3) +
    geom_hline(yintercept = 0, color = "gray") +
    geom_vline(xintercept = 16.5, color = "gray", linetype="dotted") +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) +
    coord_flip() +
    ggtitle(paste0(model, " ", "~ UWS-FR")) +
    labs(x = "Universal Tooth Number", y = "Low UWS-FR <--> High UWS-FR", color = "Tooth Class") 
}

#plot the interaction
#ylim(-4, 4)
plot_interaction <- function(df, model) {
  p <- ggplot(filter(df, term == "Cohort:UWS-FR"), aes(x = tooth, y = estimate, color = tooth.class, shape=Significance)) +
    geom_point(size=3) +
    geom_hline(yintercept = 0, color = "gray") +
    geom_vline(xintercept = 16.5, color = "gray", linetype="dotted") +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) +
    coord_flip() +
    ggtitle(paste0(model, " ", "~ Cohort:UWS-FR")) +
    labs(x = "Universal Tooth Number", y = "Low UWS-FR <--> High UWS-FR", color = "Tooth Class") 
}

Look at DMFS

Compared to those in the low flow cohort, control subjects had fewer caries at several anterior teeth in the maxilla (teeth 6, 8, 11) and mandible (teeth 22, 23, 24, 25), as indicated by the negative regression coefficients (adj.p < 0.05). In addition, while both groups tended to have more caries at posterior sites low flow subjects tended to have significantly more at several maxillary (3, 5, 14) and mandibular molars and pre-molars (18, 19, 21, 28, 30, 31) compared to the control, as indicated by the positive regression coefficients.

#For teeth 7, 8, 22, and 27 the error 0 or 1 probability occurred; 
tooth.model = tooth.model[!(tooth.model %in% c(1, 16, 17, 32))]
caries.df <- dplyr::select(caries_map, c("DMFS.bytooth", "cohort", "age", "uwsfr", "tooth")) %>%
  filter(., !(tooth %in% c(1, 16, 17, 32) ))

keep = dplyr::select(caries.df, c("DMFS.bytooth",
                                            "tooth"))
caries.df$cohort = plyr:::revalue(caries.df$cohort, c("Control"=0, "Low Flow"=1))
caries.df$cohort = as.numeric(as.character(caries.df$cohort))


caries.df.scaled = data.frame(scale(caries.df[,2:4], center=TRUE, scale=TRUE))
caries.df.scaled = data.frame(cbind(keep), caries.df.scaled)
caries.df.scaled$dmfs_orderNorm   = orderNorm(caries.df.scaled$DMFS.bytooth)$x.t #normal

dmfs.model = 'dmfs_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
df = get_spltmap_out(df=caries.df.scaled, 
                        myvector=as.vector(caries.df.scaled$tooth), 
                        model=dmfs.model)
                        df = define_tooth_class(df)


#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")

#combine coefs and confs and filter out intercept
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the DMFS Model
Fig2A = plot_age(df, model="DMFS")

age.sig = subset(df, p.adj < 0.05 & term=="Age") # age impacts all teeth
age.sig$Significant = ifelse(age.sig$p.value <0.05, "Yes","No")

Fig2B = plot_cohort(df, model="DMFS")
dfms.sig = subset(df, p.adj < 0.05 & term=="cohort") # age cohort primarily affects the upper jaw! incisors/canines in lower jaw
#on average difference between anterior to posterior
ant = subset(df, tooth.class %in% c("Canine", "Central.Incisor", "Lateral.Incisor" ) & term=="cohort" )
post = subset(df, tooth.class %in% c("Molar",       "Pre.Molar" ) & term=="cohort" )
mean(ant$estimate)/mean(post$estimate)
## [1] -0.4645941
neg = subset(dfms.sig, estimate < 0)
sigPos = subset(dfms.sig, p.adj < 0.05 & estimate >-0.04 )

Fig2C = plot_flow(df, model="DMFS")
uws.sig = subset(df,p.adj < 0.05 & term=="UWS-FR") # 11 teeth


Fig2D = plot_interaction(df, model="DMFS")
int.sig = subset(df, p.adj < 0.05 & term=="Cohort:UWS-FR") # 11 teeth


#
p = subset(caries_map, cohort %in% c("Control", "Low Flow")) %>%
    ggplot(., aes(uwsfr , DMFS.bysubject , color=as.factor(cohort)), group=as.factor(cohort)) + 
      geom_point() + geom_smooth(method = lm) 



p1 = subset(caries_map, cohort %in% c("Control", "Low Flow")) %>%
  subset(., tooth %in% c(6, 8, 11,  22, 23, 24, 25)) %>%
  ggplot(., aes(as.factor(tooth), DMFS_orderNorm, color=cohort, ncol=2)) + geom_boxplot()+ 
  facet_wrap(~tooth, scales="free") +
  stat_compare_means() + ggtitle("Negative Coef") + geom_point()


p2 = subset(caries_map, cohort %in% c("Control", "Low Flow")) %>%
  subset(., tooth %in% c(3, 5, 14, 18, 19, 21, 28, 30, 31)) %>%
  ggplot(., aes(as.factor(tooth), DMFS_orderNorm, color=cohort)) + geom_boxplot()+ 
  facet_wrap(~tooth, scales="free") +
  stat_compare_means()+ ggtitle("Positive Coef")+ geom_point()
grid.arrange(p1, p2, ncol=2)

fm = subset(caries_map, tooth %in% c(3, 5, 14, 18, 19, 21, 28, 30, 31)) 
fm = subset(fm, cohort %in% c("Low Flow", "Control"))
fm1 = doBy::summaryBy(DMFS.bytooth~tooth+cohort, FUN=mean, data=fm)
p = ggplot(fm1, aes(cohort, DMFS.bytooth.mean)) + facet_wrap(~tooth) + geom_point()

fm = subset(caries_map, tooth %in% c(6, 8, 11,  22, 23, 24, 25)) 
fm = subset(fm, cohort %in% c("Low Flow", "Control"))
fm1 = doBy::summaryBy(DMFS.bytooth~tooth+cohort, FUN=mean, data=fm)
p2 = ggplot(fm1, aes(cohort, DMFS.bytooth.mean)) + facet_wrap(~tooth) + geom_point()

Set up CAL model

plot_cohort <- function(df, model){
  p <- ggplot(filter(df, term == "Cohort"), aes(x = tooth, y = estimate, color = tooth.class, shape=Significance)) +
    geom_point(size=3) +
    geom_hline(yintercept = 0, color = "gray") +
    geom_vline(xintercept = 16.5, color = "gray", linetype="dotted") +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high)) +
    coord_flip() +
    ggtitle(paste0(model, " ", "~ Cohort")) +
    labs(x = "Universal Tooth Number", y = "Control <--> Sjogrens", color = "Tooth Class")
}

perio.df <- dplyr::select(perio_map, c("cal", "pd","gm.cej","bop",
                                       "cohort", "age", "uwsfr", "tooth")) %>%
            filter(., tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)

#transformations

keep = dplyr::select(perio.df, c("cal",
                              "pd",
                              "gm.cej",
                              "bop",
                              "cohort",
                              "tooth"))
perio.df$cohort = plyr:::revalue(perio.df$cohort, c("Control"=0, "Low Flow"=1))
perio.df$cohort = as.numeric(as.character(perio.df$cohort))
perio.df.scaled = data.frame(scale(perio.df[,5:7], center=TRUE, scale=TRUE))
perio.df.scaled = data.frame(cbind(keep), perio.df.scaled)

perio.df.scaled$cal_orderNorm   = orderNorm(perio.df.scaled$cal)$x.t #normal


cal.model = 'cal_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled, 
                        myvector=as.vector(perio.df$tooth), 
                        model=cal.model)
                        df = define_tooth_class(mycoefficients)

#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
                        
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the CAL Model
Fig2E = plot_age(df, model="CAL")
age.sig = subset(df, p.adj < 0.05 & term=="Age") # age impacts all teeth

Fig2F = plot_cohort(df, model="CAL")
cal.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in lower jaw
antMax = subset(cal.sig, tooth %in% c(6, 7, 8, 9, 10, 11))
min(antMax$estimate)
## [1] 0.4170893
max(antMax$estimate)
## [1] 0.5641276
mean(antMax$estimate)
## [1] 0.4827225
antMan = subset(cal.sig, tooth %in% c(22, 23, 24, 25, 26, 27))
min(antMan$estimate)
## [1] 0.3941972
max(antMan$estimate)
## [1] 0.6943266
mean(antMan$estimate)
## [1] 0.5336167
postMax = subset(cal.sig, tooth %in% c(3, 4, 5, 12, 13, 14))
min(postMax$estimate)
## [1] 0.239865
max(postMax$estimate)
## [1] 0.4848494
mean(postMax$estimate)
## [1] 0.3407975
postMan = subset(cal.sig, tooth %in% c(20, 21, 30))
min(postMan$estimate)
## [1] 0.2638477
max(postMan$estimate)
## [1] 0.5714036
mean(postMan$estimate)
## [1] 0.3998269
Fig2G = plot_flow(df, model="CAL")
uws.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth


Fig2H = plot_interaction(df, model="CAL")
int.sig = subset(df, p.adj < 0.05 & term=="Cohort:UWS-FR") # 11 teeth

Let’s do probing depth

perio.df.scaled$pd_orderNorm   = orderNorm(perio.df.scaled$pd)$x.t

pd.model = 'pd_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled, 
                        myvector=as.vector(perio.df$tooth), 
                        model=pd.model)
df = define_tooth_class(mycoefficients)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
                        

#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the PD Model
Fig2I = plot_age(df, model="PD")
pd.sig = subset(df, p.adj < 0.05 & term=="Age") # age cohort primarily affects the upper jaw! incisors/canines in 

Fig2J = plot_cohort(df, model="PD")
pd.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in 

Fig2K = plot_flow(df, model="PD")
pd.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth


Fig2L = plot_interaction(df, model="PD")

Let’s do gm-cej

perio.df.scaled$gm.cej_orderNorm   = orderNorm(perio.df.scaled$gm.cej)$x.t

pd.model = 'gm.cej_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df.scaled, 
                        myvector=as.vector(perio.df$tooth), 
                        model=pd.model)
                        df = define_tooth_class(mycoefficients)
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")

#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the GM-CEJ Model
Fig2M = plot_age(df, model="GM-CEJ")
Fig2N = plot_cohort(df, model="GM-CEJ")
gm.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in 




antMax = subset(gm.sig, tooth %in% c(7, 10, 12))
min(antMax$estimate)
## [1] 0.3390851
max(antMax$estimate)
## [1] 0.4351341
mean(antMax$estimate)
## [1] 0.3805562
antMan = subset(gm.sig, tooth %in% c(22, 23, 24, 25, 26, 27))
min(antMan$estimate)
## [1] 0.3065823
max(antMan$estimate)
## [1] 0.632668
mean(antMan$estimate)
## [1] 0.5081951
postMax = subset(gm.sig, tooth %in% c(3, 4, 5, 12, 13, 14))
min(postMax$estimate)
## [1] 0.4351341
max(postMax$estimate)
## [1] 0.4351341
mean(postMax$estimate)
## [1] 0.4351341
postMan = subset(gm.sig, tooth %in% c(20, 21, 30))
min(postMan$estimate)
## [1] 0.2404437
max(postMan$estimate)
## [1] 0.4538664
mean(postMan$estimate)
## [1] 0.347155
Fig2O = plot_flow(df, model="GM-CEJ")
gm.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth

Fig2P = plot_interaction(df, model="GM-CEJ")
int.sig = subset(df, p.adj < 0.05 & term=="Cohort:UWS-FR") # 11 teeth

let’s do bop

perio.df$bop_orderNorm   =orderNorm(perio.df$bop)$x.t
bop.model = 'bop_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'

mycoefficients = get_spltmap_out(df=perio.df.scaled, 
                        myvector=as.vector(perio.df$tooth), 
                        model=pd.model)
                        df = define_tooth_class(mycoefficients)

                        
#combine coefs and confs and filter out intercept

#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")

#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the BOP Model
Fig2Q = plot_age(df, model="BOP")
Fig2R = plot_cohort(df, model="BOP")
bop.sig = subset(df, p.adj < 0.05 & term=="Cohort") # age cohort primarily affects the upper jaw! incisors/canines in 

Fig2S = plot_flow(df, model="BOP")
bop.sig = subset(df, p.adj < 0.05 & term=="UWS-FR") # 11 teeth

Fig2T = plot_interaction(df, model="BOP")


p = ggplot(perio.df, aes(bop_orderNorm, gm.cej)) + geom_point() + facet_wrap(~tooth)

Figure 2: Explicit spatial modeling reveals independent effects of age and patient cohort on the spatial pattern of dental disease. Coefficients with uncertainty intervals are plotted for a series of per-tooth linear models where DMFS (a), CAL (b), and GM-CEJ (c) were regressed against age, patient cohort, UWS-FR and the interaction between UWS-FR and cohort.

#~~~~~~~~~~~~~~~~~~~~
fig2legend <- cowplot::get_legend(Fig2T)
Fig2 <- cowplot::plot_grid((Fig2A + theme(legend.position = "none")), #DMFS~Age
                            (Fig2E + theme(legend.position = "none")), #CAL-age                        
                            (Fig2M + theme(legend.position = "none")), #GM-CEJ
                            (Fig2Q + theme(legend.position = "none")),  #BOP
                             (Fig2I + theme(legend.position = "none")), 
                             (Fig2B + theme(legend.position = "none")), #DMFS~Cohort
                            (Fig2F + theme(legend.position = "none")), #CAL-cohort   
                            (Fig2N + theme(legend.position = "none")), #GM-CEJ
                            (Fig2R + theme(legend.position = "none")), #BOP
                           (Fig2J + theme(legend.position = "none")), 
                            (Fig2C + theme(legend.position = "none")), #DMFS~UWS-Fr
                            (Fig2G + theme(legend.position = "none")),#CAL
                            (Fig2O + theme(legend.position = "none")), #GM-CEJ
                            (Fig2S + theme(legend.position = "none")), #BOP
                            (Fig2K + theme(legend.position = "none")), 
                            (Fig2D + theme(legend.position = "none")), #DMFS~interaction
                            (Fig2H + theme(legend.position = "none")), #CAL   
                            (Fig2P + theme(legend.position = "none")), #GM-CEJ
                            (Fig2T + theme(legend.position = "none")),  #BOP
                            (Fig2L + theme(legend.position = "none")), 
                            ncol = 5, labels = c("a", "b", "c", "d",
                                               "e", "f", "g", "h",
                                               "i", "j", "k", "l",
                                               "m", "n", "o", "p",
                                               "q", "r", "s", "t"), hjust = -2.75, vjust = 1.5)
Figure2 <- cowplot::plot_grid(Fig2, fig2legend, nrow = 1, rel_widths = c(1,.3))
ggsave(Figure2, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure2.png",  width = 15, height = 10, units ="in",dpi = 300, device = "png")
Figure2

Figure 3: Relate the findings to the microbiome

Figure 3: Subgingival and supragingival communities from 3 control and 3 low flow subjects segregate by UWS-FR while age imperfectly separates supragingival communities. Principal coordinates analysis on bray Curtis dissimilarity segregated communities by UWS-FR (a) along the second coordinate which explained 12.8% of the variance. Samples clustered by age (b) for subgingival, but not supragingival sites.

library(wesanderson)
library(ggsci)

add_plot_elements <- function(x) {
      theme_update() +
        theme(plot.title = element_text(color="black", size=12),
        axis.title.x = element_text(color="black", size=12),
        axis.title.y = element_text(color="black", size=12),
        text = element_text(size=12),
        axis.text.x = element_text(angle=0, hjust=1),
        panel.spacing = unit(0, "lines"),
        strip.background = element_blank(),
        strip.placement = "outside") 
}
pal <- wes_palette("Zissou1", 10, type = "continuous")
pal2 = wes_palette("Royal1", 5, "continuous")
buda.pal <-wes_palette("Cavalcanti1", 10, "continuous")

#read in the microbial data and create a unified mapping file that includes the clinical data
clin2microbe = read.csv("~/Dropbox/oral-clinical-data-analysis-data/clin2microbemap.csv")
illumina = readRDS("~/Dropbox/oral-clinical-data-analysis-data/periodontology2000_hypo.RDS")
illumina = subset_samples(illumina, Protocol=="Clinic")
old.map = data.frame(sample_data(illumina))

new_map =merge(old.map, clin2microbe)
new_map$Duplication = duplicated(new_map$X.SampleID)
new_map = subset(new_map, Duplication !="TRUE")
rownames(new_map) = new_map$X.SampleID
sample_data(illumina) = sample_data(new_map)
sample_data(illumina)$DMFS = sample_data(illumina)$DMFS.bytooth
sample_data(illumina)$Habitat_Class = ifelse(sample_data(illumina)$Habitat_Class=="Supra", 
                                             "Supragingival", "Subgingival")

# use phyloseq to ordinate the phyloseq object
ord = ordinate(illumina, method="PCoA", distance="bray")
sample_data(illumina)$Cohort = ifelse(sample_data(illumina)$Aim=="SA1", "Control", "Low Flow")

p1 = plot_ordination(illumina, ord, type="samples", color="uwsfr", shape="Cohort") +
    scale_color_gradientn(colours = pal) +
    facet_wrap(~Habitat_Class, scales="free") + theme_classic() + add_plot_elements()

p2 = plot_ordination(illumina, ord, type="samples", color="age", shape="Cohort") + 
    scale_color_viridis_c() +
    facet_wrap(~Habitat_Class, scales="free")+ theme_classic() + add_plot_elements()


sample_data(illumina)$DMFS.bytooth = as.factor(sample_data(illumina)$DMFS.bytooth)
p3 = plot_ordination(illumina, ord, type="samples", color="DMFS.bytooth", shape="Cohort") + 
    scale_color_manual(values=buda.pal) +
    facet_wrap(~Habitat_Class, scales="free") + theme_classic()   + geom_point()+ add_plot_elements()


Figure3 <- cowplot::plot_grid((p1), 
                            (p2),
                            (p3), 
                          ncol = 1, labels = c("a", "b", "c"), hjust = -2.75, vjust = 1.5)
Figure3

ggsave(Figure3, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure3.png",  width = 8, height = 10, units ="in",dpi = 300, device = "png")

how does this related to spatial patterning

ordDf = p2$data %>%
  subset(., Axis.2 > 0) %>%
  subset(., Aim=="SA1")


modOut = p2$data %>%
  ggplot(., aes(Tooth_Class, Axis.1)) + facet_wrap(Cohort~Habitat_Class, scales="free_x") +  geom_boxplot()+
  stat_compare_means() +geom_point() 

modOut

ggsave(modOut, file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS3.png")

Supplementary Figure 1: Constrained correspondence analysis

Reference: http://userweb.eng.gla.ac.uk/umer.ijaz/bioinformatics/ggplot2/CCA.R

library(ggvegan)
buda.pal <-wes_palette("Cavalcanti1", 10, "continuous")
#set an analysis up for CCA
phyCLR= transform_sample_counts(illumina, function(x) x/sum(x))
#phyCLR = transform_sample_counts(illumina, function(x) compositions::clr(x))
map1 = data.frame(sample_data(phyCLR)$Tooth_Class, sample_data(phyCLR)$uwsfr, 
                  sample_data(phyCLR)$DMFS.bytooth,
                  sample_data(phyCLR)$subject.tooth.mean.pd, sample_data(phyCLR)$subject.tooth.mean.cal,
                  sample_data(phyCLR)$subject.tooth.mean.gm.cej, sample_data(phyCLR)$age )
Z=data.frame(sample_data(phyCLR)$Subject)
colnames(map1) = c("Class", "uws_fr", "dmfs", "pd", "cal", "gmcej", "age")
map1$Class = as.numeric(map1$Class)
subs = as.vector(sample_data(phyCLR)$Subject)
runs = as.vector(sample_data(phyCLR)$Pool_Name)
map1$uws_fr = abs(map1$uws_fr)

otus = as.matrix(otu_table(phyCLR))
#for some reason this wworks on tsp = tax_glom(phy, "Genus")

attach(map)
ord = cca(otus ~ map1$uws_fr+map1$dmfs+map1$pd+map1$cal+map1$gmcej+map1$age, Z=Z, Ssitenv=map1) # add 
ord.out<-scores(ord,display=c("sp","wa","lc","bp","cn"))


#Extract site data first
df_sites<-data.frame(ord.out$sites,data.frame(sample_data(phyCLR)))

#Draw sites
p<-ggplot()
p<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort), size=2) + 
  theme_classic()

#Draw biplots
mupltiplier <- vegan:::ordiArrowMul(ord.out$biplot)


df_arrows<- ord.out$biplot*2
colnames(df_arrows)<-c("x","y")
df_arrows=as.data.frame(df_arrows)
foo = reshape2::colsplit(rownames(df_arrows), "([$])", c("junk", "variable"))
foo = plyr::revalue(as.factor(foo$variable), c("uws_fr"="UWS-FR",
                     "dmfs1"="DMFS1",
                     "dmfs2"="DMS2",
                     "pd"="PD",
                     "cal"="CAL",
                     "gmcej"="GM-CEJ"))

rownames(df_arrows) = foo

p<-p+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm")))

p<-p+geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows)))


# Draw species
df_species<- as.data.frame(ord.out$species)
colnames(df_species)<-c("x","y")
df_species$ASV = rownames(df_species)
tax  = data.frame(phyCLR@tax_table@.Data, ASV=rownames(phyCLR@tax_table@.Data), Abundance=taxa_sums(phyCLR))
dfTax = plyr::join(tax, df_species)
dfTax <- dfTax[order(dfTax$Abundance, decreasing=TRUE), ] 
dfTax$ASV_Number = paste0("ASV", 1:nrow(dfTax))
dfTax = dfTax[1:15,]

# Either choose text or points
p<-p+geom_point(data=dfTax,aes(x,y, color=Genus), size=3)+ 
    scale_color_manual(values=c("#FFDEE6","#03919B",
                                "#FE9866", "#BCEAF6", "#B4CF68",
                                "#0A5947","#FCCEB2","#C17A2A","#EC5064"))
Fig3A<-p+geom_text(data=dfTax,aes(x,y, label=ASV_Number))
p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=DMFS.bytooth), size=2) + 
  theme_classic()
Fig3B<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm"))) +
      geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
    guides(colour=guide_legend(title="DMFS"))

p = ggplot()
Fig3C<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=subject.tooth.mean.cal), size=2) + 
  theme_classic() + 
  scale_color_gradientn(colours = pal) +
  geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm"))) +
      geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +     
      guides(colour=guide_legend(title="Mean CAL"))


p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=Habitat_Class), size=2) + 
  theme_classic()
Fig3D<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm"))) +
      geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) 


p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=Subject), size=2) + 
  theme_classic()
Fig3E<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm"))) +
      geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
    guides(colour=guide_legend(title="DMFS"))


p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=uwsfr), size=2) + 
  theme_classic()
Fig3F<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm"))) +
      geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) +
  scale_color_viridis()


p = ggplot()
p1<-p+geom_point(data=df_sites,aes(x=CCA1,y=CCA2,shape=Cohort, color=DMFS.bytooth ), size=2) + 
  theme_classic()
Fig3G<-p1+geom_segment(data=df_arrows, aes(x = 0, y = 0, xend = x, yend = y),
                 arrow = arrow(length = unit(0.2, "cm"))) +
      geom_text(data=as.data.frame(df_arrows*1.1),aes(x, y, label = rownames(df_arrows))) 


grid.arrange(Fig3B, 
             Fig3E,
             Fig3C, 
             Fig3A, 
             Fig3F, 
             Fig3G,ncol=2)

ggsave(
grid.arrange(Fig3B, 
             Fig3E,
             Fig3C, 
             Fig3A,  ncol=2), file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS4.png", width = 14, height = 10)

library(seqRFLP)
x = data.frame(select(dfTax, c("ASV_Number", "ASV")))
foo=dataframe2fas(x, file = "~/Dropbox/oral-clinical-data-analysis/top30.fasta")

Supplementary Figure 2: different distance metrics

library(vegan)
###do relative ab?undance transformation and see if  the patterns are the same
illuminaRA  = transform_sample_counts(illumina, function(x) x / sum(x) )
#replace the otu table with hellinger transformed data
otus = data.frame(otu_table(illuminaRA))
outs.h = decostand(otus, "hellinger")
otu_table(illuminaRA) = otu_table(outs.h, taxa_are_rows=FALSE)

#make a list of nmds objects
dist_methods_deco =  c("binomial", "bray", "canberra", "euclidean", "gower", "jaccard", "kulczynski", "manhattan")

nmds_ideco.list <- vector("list", length(dist_methods_deco))
plot_ideco.list <- vector("list", length(dist_methods_deco))     

for(i in 1:length(dist_methods_deco)){
        iDist <- vegan::vegdist(outs.h, method=dist_methods_deco[i])
        iMDS <- ordinate(illuminaRA, method="PCoA", distance=iDist, correction="lingoes")
        p <- plot_ordination(illuminaRA, iMDS, color="Tooth_Class") + facet_wrap(Jaw_Quadrant~Tooth_Aspect)
        ps <- p$data
        ps$Distance <- dist_methods_deco[[i]]
        plot_ideco.list[[i]] <- ps
        nmds_ideco.list[[i]] <- iMDS
}
plot_ideco <- do.call("rbind", plot_ideco.list) 
p1=ggplot(plot_ideco, aes(Axis.1, Axis.2, color=uwsfr)) + 
    facet_wrap(Distance~Habitat_Class, scales="free", ncol=2) + geom_point() + theme_bw()+ 
    scale_color_gradientn(colours = pal) + ggtitle("UWSFR")

p2=ggplot(plot_ideco, aes(Axis.1, Axis.2, color=age)) + 
    facet_wrap(Distance~Habitat_Class, scales="free", ncol=2) + geom_point() + theme_bw()+ 
    ggtitle("Age") + scale_color_viridis_c()

p3=ggplot(plot_ideco, aes(Axis.1, Axis.2, color=DMFS.bytooth)) + 
    facet_wrap(Distance~Habitat_Class, scales="free", ncol=2) + geom_point() + theme_bw()+ 
    ggtitle("DMFS")+scale_color_manual(values=buda.pal)


grid.arrange(p1, p2, p3, ncol=3)

ggsave(grid.arrange(p1, p2, p3, ncol=3), file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS2.png",  width = 12, height = 14, units ="in",dpi = 300)

Microbial migration - refer to supplementary data 3.

Figure 5A: VAS

This plot shows that dry_speak, dry_qol, dry_swallow are correlated together, dry_tongue, dry_throat, dry_palate are together, and dry_lips is alone. This suggests that these grouped variables vary together. These plots also show that PC1 vs PC2 separates the data primarily by cohort. This suggests that these variables may be used to possibly distinguish between cohort.

quest.df <- read.csv("~/Dropbox/oral-clinical-data-analysis-data/dry_mouth_questionnaire_20190915.csv")
#add cohort variable and select only the VAS varaibles for input to a PCoA
quest.df$cohort <- ifelse(quest.df$aim == 3, "Sjögren's", "Control")
pca_in <- quest.df %>%
  dplyr::select(., phq_dry_swallow:phq_dry_palate) 
quest.df$cat = ifelse(quest.df$uwsfr < 0.1, "Lower", "Higher")

## Plot the PCoA
library("FactoMineR")
res.pca <- PCA(pca_in, graph = FALSE)
Figure5A = fviz_pca_biplot(res.pca, 
                col.ind = quest.df$cat, palette = "jco", 
                addEllipses = FALSE, label = "var",
                col.var = "black", repel = FALSE,
                legend.title = "Flow Rate Category") 

ggsave(Figure5A, file="~/Dropbox/oral-clinical-data-analysis-ms/Figure5a.png", device="png", height = 4, width = 8)

Supplementary figure S4: raw data underlying VSQ

# plot the raw data
vsq = melt(quest.df, id.vars = c("redcap", "aim", "uwsfr", "cohort", "X"))
vsq$value = as.numeric(as.character(vsq$value))
vsq = subset(vsq, variable != "cat")
FigureS4 = ggplot(vsq, aes(cohort, value, fill=cohort)) + 
    geom_boxplot() + facet_wrap(~variable, ncol=4) + ylab("VAS Rating (0-100)") + theme_classic()+
    stat_compare_means()
FigureS4

ggsave(FigureS4, file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS4.png", device="png", height = 6, width = 8)

Figure 5B: Symptomatic complaints of dry mouth predict low salivary flow.

Conditional inference tree was used to identify the variables that most robustly separated patients into groups based on low (UWS-FR < 0.1ml/min) and not low (UWS-FR > 0.1 ml/min) subgroups. This model accurately identified 84.6% (11/13) subjects as having salivary flow rates lower than 0.1 ml/min. Most subjects who indicated that low salivary flow negatively impacted their overall quality of life (VAS >=56) had flow rates of less than 0.1 ml/min. On the other hand, most subjects who rated a negligible impact of low flow on their quality of life as well as an absence of dryness on their lips had flow rates exceeding 0.1 ml/min.

library(rpart)
library(rpart.plot)
# CART model

latlontree = rpart(uwsfr ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate, data=quest.df, method="anova")

quest.df$cohort = as.factor(as.character(quest.df$cohort))
latlontree = rpart(cohort ~ phq_dry_swallow + uwsfr, data=quest.df)


library(party)
quest.df$cat = ifelse(quest.df$uwsfr < 0.1, "low", "not.low")
quest.df$cat   = as.factor(quest.df$cat)
fit <- ctree(cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate + uwsfr,
             data=quest.df)


### extract terminal node ID, two ways
all.equal(predict(fit, type = "node"), where(fit))
## [1] TRUE
#get classification
table(predict(fit), quest.df$cat)
##          
##           low not.low
##   low      11       2
##   not.low   6     111
#estimated class probabilities
tr <- treeresponse(fit, newdata = quest.df)
tr = do.call(rbind, tr) 
tr
##             [,1]      [,2]
##   [1,] 0.0200000 0.9800000
##   [2,] 0.0200000 0.9800000
##   [3,] 0.0200000 0.9800000
##   [4,] 0.0200000 0.9800000
##   [5,] 0.0200000 0.9800000
##   [6,] 0.0200000 0.9800000
##   [7,] 0.0200000 0.9800000
##   [8,] 0.8461538 0.1538462
##   [9,] 0.0200000 0.9800000
##  [10,] 0.0200000 0.9800000
##  [11,] 0.2352941 0.7647059
##  [12,] 0.0200000 0.9800000
##  [13,] 0.0200000 0.9800000
##  [14,] 0.2352941 0.7647059
##  [15,] 0.0200000 0.9800000
##  [16,] 0.2352941 0.7647059
##  [17,] 0.0200000 0.9800000
##  [18,] 0.2352941 0.7647059
##  [19,] 0.0200000 0.9800000
##  [20,] 0.8461538 0.1538462
##  [21,] 0.2352941 0.7647059
##  [22,] 0.2352941 0.7647059
##  [23,] 0.2352941 0.7647059
##  [24,] 0.8461538 0.1538462
##  [25,] 0.8461538 0.1538462
##  [26,] 0.2352941 0.7647059
##  [27,] 0.0200000 0.9800000
##  [28,] 0.0200000 0.9800000
##  [29,] 0.8461538 0.1538462
##  [30,] 0.0200000 0.9800000
##  [31,] 0.0200000 0.9800000
##  [32,] 0.2352941 0.7647059
##  [33,] 0.8461538 0.1538462
##  [34,] 0.8461538 0.1538462
##  [35,] 0.0200000 0.9800000
##  [36,] 0.8461538 0.1538462
##  [37,] 0.0200000 0.9800000
##  [38,] 0.0200000 0.9800000
##  [39,] 0.0200000 0.9800000
##  [40,] 0.0200000 0.9800000
##  [41,] 0.8461538 0.1538462
##  [42,] 0.2352941 0.7647059
##  [43,] 0.0200000 0.9800000
##  [44,] 0.0200000 0.9800000
##  [45,] 0.0200000 0.9800000
##  [46,] 0.8461538 0.1538462
##  [47,] 0.8461538 0.1538462
##  [48,] 0.2352941 0.7647059
##  [49,] 0.0200000 0.9800000
##  [50,] 0.0200000 0.9800000
##  [51,] 0.0200000 0.9800000
##  [52,] 0.0200000 0.9800000
##  [53,] 0.0200000 0.9800000
##  [54,] 0.0200000 0.9800000
##  [55,] 0.0200000 0.9800000
##  [56,] 0.0200000 0.9800000
##  [57,] 0.0200000 0.9800000
##  [58,] 0.0200000 0.9800000
##  [59,] 0.0200000 0.9800000
##  [60,] 0.0200000 0.9800000
##  [61,] 0.0200000 0.9800000
##  [62,] 0.0200000 0.9800000
##  [63,] 0.2352941 0.7647059
##  [64,] 0.0200000 0.9800000
##  [65,] 0.0200000 0.9800000
##  [66,] 0.0200000 0.9800000
##  [67,] 0.0200000 0.9800000
##  [68,] 0.0200000 0.9800000
##  [69,] 0.0200000 0.9800000
##  [70,] 0.2352941 0.7647059
##  [71,] 0.0200000 0.9800000
##  [72,] 0.0200000 0.9800000
##  [73,] 0.0200000 0.9800000
##  [74,] 0.0200000 0.9800000
##  [75,] 0.0200000 0.9800000
##  [76,] 0.2352941 0.7647059
##  [77,] 0.0200000 0.9800000
##  [78,] 0.0200000 0.9800000
##  [79,] 0.0200000 0.9800000
##  [80,] 0.0200000 0.9800000
##  [81,] 0.0200000 0.9800000
##  [82,] 0.0200000 0.9800000
##  [83,] 0.0200000 0.9800000
##  [84,] 0.0200000 0.9800000
##  [85,] 0.0200000 0.9800000
##  [86,] 0.0200000 0.9800000
##  [87,] 0.0200000 0.9800000
##  [88,] 0.0200000 0.9800000
##  [89,] 0.0200000 0.9800000
##  [90,] 0.0200000 0.9800000
##  [91,] 0.2352941 0.7647059
##  [92,] 0.0200000 0.9800000
##  [93,] 0.0200000 0.9800000
##  [94,] 0.0200000 0.9800000
##  [95,] 0.0200000 0.9800000
##  [96,] 0.0200000 0.9800000
##  [97,] 0.0200000 0.9800000
##  [98,] 0.2352941 0.7647059
##  [99,] 0.0200000 0.9800000
## [100,] 0.0200000 0.9800000
## [101,] 0.0200000 0.9800000
## [102,] 0.0200000 0.9800000
## [103,] 0.0200000 0.9800000
## [104,] 0.8461538 0.1538462
## [105,] 0.2352941 0.7647059
## [106,] 0.0200000 0.9800000
## [107,] 0.0200000 0.9800000
## [108,] 0.0200000 0.9800000
## [109,] 0.0200000 0.9800000
## [110,] 0.0200000 0.9800000
## [111,] 0.0200000 0.9800000
## [112,] 0.0200000 0.9800000
## [113,] 0.0200000 0.9800000
## [114,] 0.0200000 0.9800000
## [115,] 0.0200000 0.9800000
## [116,] 0.0200000 0.9800000
## [117,] 0.0200000 0.9800000
## [118,] 0.0200000 0.9800000
## [119,] 0.0200000 0.9800000
## [120,] 0.0200000 0.9800000
## [121,] 0.0200000 0.9800000
## [122,] 0.0200000 0.9800000
## [123,] 0.0200000 0.9800000
## [124,] 0.0200000 0.9800000
## [125,] 0.0200000 0.9800000
## [126,] 0.0200000 0.9800000
## [127,] 0.0200000 0.9800000
## [128,] 0.0200000 0.9800000
## [129,] 0.8461538 0.1538462
## [130,] 0.0200000 0.9800000
prob = data.frame(tr, quest.df)
prob
##            X1        X2    X redcap aim phq_dry_swallow phq_dry_qol
## 1   0.0200000 0.9800000    1     17   1               0           0
## 2   0.0200000 0.9800000   29     18   1               0           0
## 3   0.0200000 0.9800000   57     23   1              19          16
## 4   0.0200000 0.9800000   81     36   1               1           1
## 5   0.0200000 0.9800000  109     46   1              29          18
## 6   0.0200000 0.9800000  137     49   1               0           1
## 7   0.0200000 0.9800000  161     63   1               0           0
## 8   0.8461538 0.1538462  193     66   3              99         100
## 9   0.0200000 0.9800000  221    136   1               0           0
## 10  0.0200000 0.9800000  249    143   1               0           0
## 11  0.2352941 0.7647059  277    170   1              50          50
## 12  0.0200000 0.9800000  305    171   1               2           0
## 13  0.0200000 0.9800000  333    175   1               5           5
## 14  0.2352941 0.7647059  361    178   3              31          29
## 15  0.0200000 0.9800000  389    181   1               3           0
## 16  0.2352941 0.7647059  417    183   1              50          50
## 17  0.0200000 0.9800000  445    190   1               0           3
## 18  0.2352941 0.7647059  473    200   3              35          30
## 19  0.0200000 0.9800000  501    202   1               1           0
## 20  0.8461538 0.1538462  529    218   3              65          65
## 21  0.2352941 0.7647059  557    219   3              50          50
## 22  0.2352941 0.7647059  585    221   3              50          50
## 23  0.2352941 0.7647059  611    226   3              50          50
## 24  0.8461538 0.1538462  635    236   3              76          71
## 25  0.8461538 0.1538462  666    239   3             100         100
## 26  0.2352941 0.7647059  694    247   3              50          50
## 27  0.0200000 0.9800000  721    248   3              40          48
## 28  0.0200000 0.9800000  749    254   1               7           5
## 29  0.8461538 0.1538462  775    266   3              75          65
## 30  0.0200000 0.9800000  799    270   2               6          11
## 31  0.0200000 0.9800000  827    274   1               0           1
## 32  0.2352941 0.7647059  849    277   1              50          50
## 33  0.8461538 0.1538462  877    279   3              51          90
## 34  0.8461538 0.1538462  906    280   3              30          58
## 35  0.0200000 0.9800000  931    281   2               0           0
## 36  0.8461538 0.1538462  959    282   3              90          90
## 37  0.0200000 0.9800000  985    283   3              15           5
## 38  0.0200000 0.9800000 1013    288   2               2           0
## 39  0.0200000 0.9800000 1037    289   1               0           0
## 40  0.0200000 0.9800000 1065    294   1               0           0
## 41  0.8461538 0.1538462 1094    295   3              71          73
## 42  0.2352941 0.7647059 1124    297   3              80          47
## 43  0.0200000 0.9800000 1148    298   1               4           2
## 44  0.0200000 0.9800000 1176    299   1               0           0
## 45  0.0200000 0.9800000 1208    300   1               1           3
## 46  0.8461538 0.1538462 1236    303   3              73          64
## 47  0.8461538 0.1538462 1257    307   3              97          98
## 48  0.2352941 0.7647059 1284    308   1              15          14
## 49  0.0200000 0.9800000 1312    309   1               2           0
## 50  0.0200000 0.9800000 1338    310   1               3           2
## 51  0.0200000 0.9800000 1366    313   1               3           3
## 52  0.0200000 0.9800000 1394    314   1               0           0
## 53  0.0200000 0.9800000 1422    315   2               3           3
## 54  0.0200000 0.9800000 1450    317   1               0           0
## 55  0.0200000 0.9800000 1478    318   1              25           0
## 56  0.0200000 0.9800000 1506    319   1              10           0
## 57  0.0200000 0.9800000 1530    321   1              15           0
## 58  0.0200000 0.9800000 1558    323   2               0           0
## 59  0.0200000 0.9800000 1584    324   1               3           1
## 60  0.0200000 0.9800000 1612    328   2               2           3
## 61  0.0200000 0.9800000 1642    329   2               1           0
## 62  0.0200000 0.9800000 1671    330   1               8          12
## 63  0.2352941 0.7647059 1703    331   3              43          56
## 64  0.0200000 0.9800000 1730    336   2              60           7
## 65  0.0200000 0.9800000 1758    337   1               8          10
## 66  0.0200000 0.9800000 1786    339   2               0           2
## 67  0.0200000 0.9800000 1818    340   1               1           3
## 68  0.0200000 0.9800000 1846    341   2               3           0
## 69  0.0200000 0.9800000 1873    342   2              14           7
## 70  0.2352941 0.7647059 1901    343   2              50          50
## 71  0.0200000 0.9800000 1929    344   2               0           2
## 72  0.0200000 0.9800000 1960    345   2               0           0
## 73  0.0200000 0.9800000 1989    346   2              20          24
## 74  0.0200000 0.9800000 2019    347   1               0           0
## 75  0.0200000 0.9800000 2047    348   1               0           2
## 76  0.2352941 0.7647059 2072    350   1               1          49
## 77  0.0200000 0.9800000 2100    352   2               1           0
## 78  0.0200000 0.9800000 2128    353   1               0           0
## 79  0.0200000 0.9800000 2156    354   2               0           0
## 80  0.0200000 0.9800000 2184    356   1               0           0
## 81  0.0200000 0.9800000 2212    358   2               0           0
## 82  0.0200000 0.9800000 2244    359   2               0           0
## 83  0.0200000 0.9800000 2272    361   1               0           0
## 84  0.0200000 0.9800000 2300    366   1               0           0
## 85  0.0200000 0.9800000 2328    367   2               0           0
## 86  0.0200000 0.9800000 2356    369   2               0           0
## 87  0.0200000 0.9800000 2384    371   2              12           6
## 88  0.0200000 0.9800000 2412    372   2              51           0
## 89  0.0200000 0.9800000 2440    376   1               0           0
## 90  0.0200000 0.9800000 2466    377   2               0           1
## 91  0.2352941 0.7647059 2492    378   3              74          10
## 92  0.0200000 0.9800000 2520    379   1               1           3
## 93  0.0200000 0.9800000 2547    380   1               5           0
## 94  0.0200000 0.9800000 2578    381   1               0           0
## 95  0.0200000 0.9800000 2602    382   1               7           4
## 96  0.0200000 0.9800000 2631    383   1               0           0
## 97  0.0200000 0.9800000 2657    384   1               0           0
## 98  0.2352941 0.7647059 2685    385   3              49          24
## 99  0.0200000 0.9800000 2708    386   1               3           2
## 100 0.0200000 0.9800000 2736    387   1               0           0
## 101 0.0200000 0.9800000 2764    389   1               0           0
## 102 0.0200000 0.9800000 2790    390   1               0           0
## 103 0.0200000 0.9800000 2818    391   1               0           0
## 104 0.8461538 0.1538462 2850    393   3              70          78
## 105 0.2352941 0.7647059 2876    395   1               0           0
## 106 0.0200000 0.9800000 2904    396   1               1           0
## 107 0.0200000 0.9800000 2936    398   1               0           0
## 108 0.0200000 0.9800000 2964    402   1              15           7
## 109 0.0200000 0.9800000 2992    404   1               2           0
## 110 0.0200000 0.9800000 3020    405   1               0           0
## 111 0.0200000 0.9800000 3048    412   1               0           0
## 112 0.0200000 0.9800000 3078    413   1               3           0
## 113 0.0200000 0.9800000 3109    415   1               0           0
## 114 0.0200000 0.9800000 3137    416   1               0           0
## 115 0.0200000 0.9800000 3163    418   1               0           1
## 116 0.0200000 0.9800000 3189    419   2               0           0
## 117 0.0200000 0.9800000 3217    420   2               0           0
## 118 0.0200000 0.9800000 3246    421   1               0           0
## 119 0.0200000 0.9800000 3272    422   2               0           0
## 120 0.0200000 0.9800000 3299    423   2               0           0
## 121 0.0200000 0.9800000 3326    427   1               0           0
## 122 0.0200000 0.9800000 3354    428   1               0           0
## 123 0.0200000 0.9800000 3382    429   2               0           0
## 124 0.0200000 0.9800000 3407    430   1               4           0
## 125 0.0200000 0.9800000 3435    433   1               1           4
## 126 0.0200000 0.9800000 3463    435   2              50          50
## 127 0.0200000 0.9800000 3493    436   2               8           0
## 128 0.0200000 0.9800000 3524    437   2               0           0
## 129 0.8461538 0.1538462 3552    438   2              71          71
## 130 0.0200000 0.9800000 3580    439   1               5           3
##     phq_dry_speak phq_dry_throat phq_dry_tongue phq_dry_lips phq_dry_palate
## 1               0              0              0            8              5
## 2               0              5              0           10              0
## 3              16             25             22           35             29
## 4               1              1              1            3              1
## 5              29             48             28           56             62
## 6               0              0              0            0              0
## 7               0              0              0           27              0
## 8              99             75             90           95             95
## 9               0              0              0           10              0
## 10              0              0              0            0              0
## 11             50             50             50           50             50
## 12              0              0              0            0              0
## 13              4              5              4            4              5
## 14             17             21             36           50             33
## 15              0              5              7           57             16
## 16             50             50             50           50             50
## 17              2              4              2           42             44
## 18             40             55             40           50             55
## 19              0              0              0           22              9
## 20             75             75             75           75             75
## 21             50              1             71           73             50
## 22             50             50             50           50             50
## 23             50             50             50           50             50
## 24             47             69             75           82             80
## 25             86             95            100           93            100
## 26             50             95             95          100             95
## 27             51             67              0           67             69
## 28              8              8              9           16             10
## 29             60             80             60           85             55
## 30              5             19              2           15             13
## 31              0              0              0           16              0
## 32             50             50             50           50             50
## 33             85             50             51          100             51
## 34             58             80             80           80             78
## 35              0              0              0            7             10
## 36             95             15             90           85             90
## 37             25             11              4           66             16
## 38              3              2              2            2              2
## 39              0              0              0            0              0
## 40              0              0              0           10              0
## 41             73             50             48           71             53
## 42             75             86             72           61             70
## 43              2              4              4            3              1
## 44              0              0              0            0              0
## 45              1              1              0            1              2
## 46             73             64             82           88             76
## 47             99             50             51           50             52
## 48              0             49             49           62             49
## 49              0              0              0           24              0
## 50              0              3              0            0              1
## 51              6              6              6            7              1
## 52              0              0              0           23              0
## 53              0              2              1            2              0
## 54              0              0              0           24              0
## 55             25              0              0           25              0
## 56              2             22             12           24             10
## 57             15              0              0           15              2
## 58              0              0              0            3              0
## 59              3              0              0           31              3
## 60              2              4              2            3              2
## 61              0              6             10           34              3
## 62              6              9             10           28              7
## 63             55             51             46           61             49
## 64              7             20             20           52             33
## 65              6             12              8           53             11
## 66              3              2              0            0              0
## 67              3              3             11            4              6
## 68              0              0              0            0              0
## 69              2              6              7           11              9
## 70             50             50             50           50              1
## 71              1              2              2            3              1
## 72              0              0              0           11              0
## 73             19              6             15           49             50
## 74              0              0              0            0              0
## 75              1              0              0            5              0
## 76             50             48             50           49             50
## 77              0             16              3           10              0
## 78              0              2              0            0              0
## 79              0              0              0            0              0
## 80              3              3              0            0              0
## 81              0             17             15           21              8
## 82              0             20              0           34              0
## 83              0             16             11           29             26
## 84              0              0              0            1              0
## 85              8              0              0           22              0
## 86              0              0              0           10              0
## 87             22             16              0           13              3
## 88              6              6              7           18              6
## 89              0             26              1           27              0
## 90             50              0              0           28              0
## 91              0             65             61           77             73
## 92             27             27             24            4              4
## 93              0              2              0            0              0
## 94              0              0              0            5              0
## 95              6              3              9            7              6
## 96              0              0              0           49              0
## 97              0              0              0           25              0
## 98             24             51             49           49             50
## 99              0              0              0            0              0
## 100             0              0              0            0              0
## 101             0              0              0           11              0
## 102             0              0             11           26              0
## 103             0              0              0            0              0
## 104            85             80             82           83             85
## 105             0             50             50           50             50
## 106             0              0              0            0              0
## 107             0              0              0           71              0
## 108             8              6              8           25              4
## 109             2              0              3            8              0
## 110             0              2              3           50              1
## 111             0              0              0           50              0
## 112             1              0              0           24              0
## 113             0              0              0           11              0
## 114             0              0              0            0              0
## 115            21             26             18           49              7
## 116             0              0              0            0              0
## 117             0             13             15           29             29
## 118             0              0              0           20              0
## 119             0             10              0           20              3
## 120             0              0              1            2              3
## 121             0              0              0            0              0
## 122             0              0             20           66             22
## 123             1              0              0            0              0
## 124             0              0              1            0              1
## 125             3              3              1           49              3
## 126            50              8              4            8              7
## 127             0              0              0            8              0
## 128             0              0              0            0              0
## 129            67             68             78           91             62
## 130             6              3              5           22              4
##     uwsfr    cohort     cat
## 1   0.611   Control not.low
## 2   0.410   Control not.low
## 3   0.301   Control not.low
## 4   0.511   Control not.low
## 5   0.334   Control not.low
## 6   0.562   Control not.low
## 7   0.422   Control not.low
## 8   0.024 Sjögren's     low
## 9   1.107   Control not.low
## 10  0.502   Control not.low
## 11  0.789   Control not.low
## 12  0.745   Control not.low
## 13  0.311   Control not.low
## 14  0.000 Sjögren's     low
## 15  0.751   Control not.low
## 16  0.413   Control not.low
## 17  0.119   Control not.low
## 18  0.232 Sjögren's not.low
## 19  0.842   Control not.low
## 20  0.676 Sjögren's not.low
## 21  0.000 Sjögren's     low
## 22  0.077 Sjögren's     low
## 23  0.114 Sjögren's not.low
## 24  0.083 Sjögren's     low
## 25  0.000 Sjögren's     low
## 26  0.056 Sjögren's     low
## 27  0.142 Sjögren's not.low
## 28  0.593   Control not.low
## 29  0.050 Sjögren's     low
## 30  0.721   Control not.low
## 31  0.340   Control not.low
## 32  0.391   Control not.low
## 33  0.065 Sjögren's     low
## 34  0.000 Sjögren's     low
## 35  0.252   Control not.low
## 36  0.010 Sjögren's     low
## 37  0.492 Sjögren's not.low
## 38  0.187   Control not.low
## 39  0.340   Control not.low
## 40  0.364   Control not.low
## 41  0.000 Sjögren's     low
## 42  0.137 Sjögren's not.low
## 43  0.383   Control not.low
## 44  1.796   Control not.low
## 45  1.065   Control not.low
## 46  0.001 Sjögren's     low
## 47  0.052 Sjögren's     low
## 48  0.193   Control not.low
## 49  0.684   Control not.low
## 50  0.314   Control not.low
## 51  0.341   Control not.low
## 52  0.852   Control not.low
## 53  0.356   Control not.low
## 54  0.414   Control not.low
## 55  0.409   Control not.low
## 56  0.106   Control not.low
## 57  0.318   Control not.low
## 58  0.115   Control not.low
## 59  0.097   Control     low
## 60  0.234   Control not.low
## 61  0.383   Control not.low
## 62  0.250   Control not.low
## 63  0.261 Sjögren's not.low
## 64  0.368   Control not.low
## 65  0.412   Control not.low
## 66  0.294   Control not.low
## 67  0.247   Control not.low
## 68  0.386   Control not.low
## 69  0.260   Control not.low
## 70  0.148   Control not.low
## 71  0.193   Control not.low
## 72  0.605   Control not.low
## 73  0.028   Control     low
## 74  0.420   Control not.low
## 75  0.549   Control not.low
## 76  0.705   Control not.low
## 77  0.708   Control not.low
## 78  0.181   Control not.low
## 79  1.108   Control not.low
## 80  0.268   Control not.low
## 81  0.614   Control not.low
## 82  0.809   Control not.low
## 83  0.438   Control not.low
## 84  0.189   Control not.low
## 85  0.399   Control not.low
## 86  0.738   Control not.low
## 87  0.270   Control not.low
## 88  0.178   Control not.low
## 89  0.475   Control not.low
## 90  0.464   Control not.low
## 91  0.292 Sjögren's not.low
## 92  0.423   Control not.low
## 93  0.399   Control not.low
## 94  0.579   Control not.low
## 95  0.199   Control not.low
## 96  0.517   Control not.low
## 97  0.264   Control not.low
## 98  0.150 Sjögren's not.low
## 99  0.709   Control not.low
## 100 0.263   Control not.low
## 101 0.559   Control not.low
## 102 0.435   Control not.low
## 103 0.326   Control not.low
## 104 0.017 Sjögren's     low
## 105 0.232   Control not.low
## 106 0.198   Control not.low
## 107 0.756   Control not.low
## 108 0.313   Control not.low
## 109 0.180   Control not.low
## 110 0.475   Control not.low
## 111 1.156   Control not.low
## 112 0.698   Control not.low
## 113 0.757   Control not.low
## 114 0.442   Control not.low
## 115 0.316   Control not.low
## 116 0.127   Control not.low
## 117 0.507   Control not.low
## 118 0.470   Control not.low
## 119 0.404   Control not.low
## 120 0.412   Control not.low
## 121 0.677   Control not.low
## 122 0.283   Control not.low
## 123 0.242   Control not.low
## 124 0.267   Control not.low
## 125 0.660   Control not.low
## 126 0.691   Control not.low
## 127 0.183   Control not.low
## 128 0.427   Control not.low
## 129 0.273   Control not.low
## 130 0.539   Control not.low
model <- train(cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + 
                 phq_dry_tongue + phq_dry_lips + phq_dry_palate + uwsfr, 
               data = quest.df, method='ctree', tuneLength=5,
               trControl=trainControl(
                 method='cv', number=3, classProbs=TRUE, summaryFunction=twoClassSummary))


png("~/Dropbox/oral-clinical-data-analysis-ms/Figure5b.png", res=80, height=800, width=800) 
plot(fit)
dev.off()    
## quartz_off_screen 
##                 2
library(caret)
set.seed(3456)
trainIndex <- createDataPartition(quest.df$cat, p = .7,
                                  list = FALSE,
                                  times = 1)
train <- quest.df[ trainIndex,]
test <- quest.df[-trainIndex,]

library(caret)

tmodel = ctree(formula=cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + 
                 phq_dry_tongue + phq_dry_lips + phq_dry_palate + uwsfr, 
               data = train)
print(tmodel)
## 
##   Conditional inference tree with 3 terminal nodes
## 
## Response:  cat 
## Inputs:  phq_dry_swallow, phq_dry_qol, phq_dry_speak, phq_dry_throat, phq_dry_tongue, phq_dry_lips, phq_dry_palate, uwsfr 
## Number of observations:  92 
## 
## 1) phq_dry_qol <= 50; criterion = 1, statistic = 49.092
##   2) phq_dry_lips <= 28; criterion = 0.996, statistic = 12.24
##     3)*  weights = 60 
##   2) phq_dry_lips > 28
##     4)*  weights = 24 
## 1) phq_dry_qol > 50
##   5)*  weights = 8
plot(tmodel)

pred = predict(tmodel, test)
foo = data.frame(pred, test)
table(foo$cat, foo$pred)
##          
##           low not.low
##   low       3       2
##   not.low   3      30

Supplementary Figure 5: Random Forest analysis of VAS

Supplementary Figure 5: Random forest analysis also identifies dryness of lips and impacts on quality of life as the most discriminant features. Random forest analysis was used to identify the most discriminant features included in the Visual Analog Scale. The out of box error rate for the random forest model was 8.46%. The most discriminant features were the impact of dry mouth on the overall quality of life (phq_dry_qol), the dryness of the lips (phq_dry_lips) consistent with the classification tree.

library(randomForest)
library(rfPermute)
fit <- randomForest(cat ~ phq_dry_swallow + phq_dry_qol + phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips + phq_dry_palate + cohort, data=quest.df, proximity = TRUE)
print(fit) # view results
## 
## Call:
##  randomForest(formula = cat ~ phq_dry_swallow + phq_dry_qol +      phq_dry_speak + phq_dry_throat + phq_dry_tongue + phq_dry_lips +      phq_dry_palate + cohort, data = quest.df, proximity = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 8.46%
## Confusion matrix:
##         low not.low class.error
## low      11       6  0.35294118
## not.low   5     108  0.04424779
importance(fit) # importance of each predictor
##                 MeanDecreaseGini
## phq_dry_swallow         2.630653
## phq_dry_qol             4.765104
## phq_dry_speak           3.450197
## phq_dry_throat          1.754508
## phq_dry_tongue          4.178521
## phq_dry_lips            4.300532
## phq_dry_palate          3.558803
## cohort                  3.523465
imp = data.frame(caret::varImp(fit))
imp$factor = rownames(imp)
imp = imp[order(-imp$Overall),] 
ordering = unique(imp$factor)
imp$factor = as.factor(as.character(imp$factor))
imp$factor  <- factor(imp$factor, levels = ordering)

p = ggplot(imp, aes(factor, Overall)) + geom_point() + coord_flip() + theme_classic() + 
  xlab("Overall")
p

ggsave(p, file="~/Dropbox/oral-clinical-data-analysis-ms/FigureS5.png", device="png", height = 6, width = 8)

Validate the lm using a data subset defined by the older control subjects

Look at DMFS - over 40

#For teeth 7, 8, 22, and 27 the error 0 or 1 probability occurred; 
tooth.model = tooth.model[!(tooth.model %in% c(1, 16, 17, 32))]
caries.df <- dplyr::select(caries_map, c("DMFS.bytooth", "cohort", "age", "uwsfr", "tooth")) %>%
  filter(., !(tooth %in% c(1, 16, 17, 32) ))
caries.df = subset(caries.df, cohort=="Control" & age >=40 | cohort=="Low Flow")
cohort = caries.df$cohort
tooth = caries.df$tooth
caries.df$cohort = NULL
caries.df.scaled = data.frame(scale(caries.df, center=TRUE, scale=TRUE))
caries.df.scaled$tooth = tooth
caries.df.scaled$cohort = cohort


#dmfs.model = 'DMFS.bytooth ~  cohort + uwsfr  + age'

dmfs.model = 'DMFS.bytooth ~ cohort + uwsfr + cohort:uwsfr + age'
df = get_spltmap_out(df=caries.df.scaled, 
                        myvector=as.vector(caries.df.scaled$tooth), 
                        model=dmfs.model)
                        df = define_tooth_class(df)
                        
#adjust the p-values for multiple testing
df$p.adj = p.adjust(df$p.value, method="BH", n=length(df$p.value))
df$Significance = ifelse(df$p.adj < 0.05, "Yes", "No")
#combine coefs and confs and filter out intercept
#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the DMFS Model
p1 = plot_age(df, model="DMFS")
p2 = plot_cohort(df, model="DMFS")
p3 = plot_flow(df, model="DMFS")
p4 = plot_interaction(df, model="DMFS")

Figure3A <- ggarrange((p1 + theme(legend.position = "none")),
                       (p2 + theme(legend.position = "none")),
                       (p3 + theme(legend.position = "none")),
                       (p4 + theme(legend.position = "none")), ncol = 4)

Set up CAL model - over 40

perio.df <- dplyr::select(perio_map, c("cal", "pd","gm.cej","bop",
                                       "cohort", "age", "uwsfr", "tooth")) %>%
            filter(., tooth != 1 & tooth != 16 & tooth != 17 & tooth != 32)
perio.df = subset(perio.df, cohort=="Control" & age >=40 | cohort=="Low Flow")

#transformations
#perio.df$cal   = orderNorm(perio.df$cal)$x.t #normal
perio.df.scaled = data.frame(scale(perio.df, center=TRUE, scale=TRUE))


cal.model = 'cal_orderNorm ~ cohort + uwsfr + cohort:uwsfr + age'
mycoefficients = get_spltmap_out(df=perio.df, 
                        myvector=as.vector(perio.df$tooth), 
                        model=cal.model)
                        df = define_tooth_class(mycoefficients)

#add factor to facet by
df$terms_f <- factor(df$term, levels=c("Age", "Cohort", "UWS-FR", "Cohort:UWS-FR"))

#Plot results for the CAL Model
p1 = plot_age(df, model="CAL")
p2 = plot_cohort(df, model="CAL")
p3 = plot_flow(df, model="CAL")
p4 = plot_interaction(df, model="CAL")

Figure3B <- ggarrange((p1 + theme(legend.position = "none")),
                       (p2 + theme(legend.position = "none")),
                       (p3 + theme(legend.position = "none")),
                       (p4 + theme(legend.position = "none")), ncol = 4)

Figure3B